Source of dataset: https://archive.ics.uci.edu/ml/datasets/individual+household+electric+power+consumption
References:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import subplots
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from tqdm import tqdm # Shows progress of executing an iterable
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('household_power_consumption.txt', sep=';')
df
| Date | Time | Global_active_power | Global_reactive_power | Voltage | Global_intensity | Sub_metering_1 | Sub_metering_2 | Sub_metering_3 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 16/12/2006 | 17:24:00 | 4.216 | 0.418 | 234.840 | 18.400 | 0.000 | 1.000 | 17.0 |
| 1 | 16/12/2006 | 17:25:00 | 5.360 | 0.436 | 233.630 | 23.000 | 0.000 | 1.000 | 16.0 |
| 2 | 16/12/2006 | 17:26:00 | 5.374 | 0.498 | 233.290 | 23.000 | 0.000 | 2.000 | 17.0 |
| 3 | 16/12/2006 | 17:27:00 | 5.388 | 0.502 | 233.740 | 23.000 | 0.000 | 1.000 | 17.0 |
| 4 | 16/12/2006 | 17:28:00 | 3.666 | 0.528 | 235.680 | 15.800 | 0.000 | 1.000 | 17.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2075254 | 26/11/2010 | 20:58:00 | 0.946 | 0.0 | 240.43 | 4.0 | 0.0 | 0.0 | 0.0 |
| 2075255 | 26/11/2010 | 20:59:00 | 0.944 | 0.0 | 240.0 | 4.0 | 0.0 | 0.0 | 0.0 |
| 2075256 | 26/11/2010 | 21:00:00 | 0.938 | 0.0 | 239.82 | 3.8 | 0.0 | 0.0 | 0.0 |
| 2075257 | 26/11/2010 | 21:01:00 | 0.934 | 0.0 | 239.7 | 3.8 | 0.0 | 0.0 | 0.0 |
| 2075258 | 26/11/2010 | 21:02:00 | 0.932 | 0.0 | 239.55 | 3.8 | 0.0 | 0.0 | 0.0 |
2075259 rows × 9 columns
df.shape
(2075259, 9)
# There are no duplicates
df.shape == df.drop_duplicates().shape
True
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2075259 entries, 0 to 2075258 Data columns (total 9 columns): # Column Dtype --- ------ ----- 0 Date object 1 Time object 2 Global_active_power object 3 Global_reactive_power object 4 Voltage object 5 Global_intensity object 6 Sub_metering_1 object 7 Sub_metering_2 object 8 Sub_metering_3 float64 dtypes: float64(1), object(8) memory usage: 142.5+ MB
# Check if there are null values
df.isna().sum()
Date 0 Time 0 Global_active_power 0 Global_reactive_power 0 Voltage 0 Global_intensity 0 Sub_metering_1 0 Sub_metering_2 0 Sub_metering_3 25979 dtype: int64
# Calculate the missing values % contribution in the dataframe
round(100*(df.isnull().sum())/df.shape[0], 2)
Date 0.00 Time 0.00 Global_active_power 0.00 Global_reactive_power 0.00 Voltage 0.00 Global_intensity 0.00 Sub_metering_1 0.00 Sub_metering_2 0.00 Sub_metering_3 1.25 dtype: float64
# I can drop the NAN since it is only 1.25%
df = df.dropna()
# Change the columns to float:
df.iloc[:, 2:8] = df.iloc[:, 2:8].astype(float)
# Or:
# df.columns
# df[['Global_active_power', 'Global_reactive_power',
# 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2',
# 'Sub_metering_3']] = df[['Global_active_power', 'Global_reactive_power',
# 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2',
# 'Sub_metering_3']].astype(float)
# Create a datetime column
df['Date_time'] = pd.to_datetime(df['Date'] + ' ' + df['Time'].astype(str))
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2049280 entries, 0 to 2075258 Data columns (total 10 columns): # Column Dtype --- ------ ----- 0 Date object 1 Time object 2 Global_active_power float64 3 Global_reactive_power float64 4 Voltage float64 5 Global_intensity float64 6 Sub_metering_1 float64 7 Sub_metering_2 float64 8 Sub_metering_3 float64 9 Date_time datetime64[ns] dtypes: datetime64[ns](1), float64(7), object(2) memory usage: 172.0+ MB
# New features:
df['Year'] = df['Date_time'].dt.year
df['Month'] = df['Date_time'].dt.month
df['DayofWeek'] = df['Date_time'].dt.dayofweek
df['Hour'] = df['Date_time'].dt.hour
# Create the target (Refer to the UCI Dataset instruction)
df['Power_consumption'] = df['Global_active_power'] * 1000/60 - \
df['Sub_metering_1'] - df['Sub_metering_2'] - df['Sub_metering_3']
df
| Date | Time | Global_active_power | Global_reactive_power | Voltage | Global_intensity | Sub_metering_1 | Sub_metering_2 | Sub_metering_3 | Date_time | Year | Month | DayofWeek | Hour | Power_consumption | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16/12/2006 | 17:24:00 | 4.216 | 0.418 | 234.84 | 18.4 | 0.0 | 1.0 | 17.0 | 2006-12-16 17:24:00 | 2006 | 12 | 5 | 17 | 52.266667 |
| 1 | 16/12/2006 | 17:25:00 | 5.360 | 0.436 | 233.63 | 23.0 | 0.0 | 1.0 | 16.0 | 2006-12-16 17:25:00 | 2006 | 12 | 5 | 17 | 72.333333 |
| 2 | 16/12/2006 | 17:26:00 | 5.374 | 0.498 | 233.29 | 23.0 | 0.0 | 2.0 | 17.0 | 2006-12-16 17:26:00 | 2006 | 12 | 5 | 17 | 70.566667 |
| 3 | 16/12/2006 | 17:27:00 | 5.388 | 0.502 | 233.74 | 23.0 | 0.0 | 1.0 | 17.0 | 2006-12-16 17:27:00 | 2006 | 12 | 5 | 17 | 71.800000 |
| 4 | 16/12/2006 | 17:28:00 | 3.666 | 0.528 | 235.68 | 15.8 | 0.0 | 1.0 | 17.0 | 2006-12-16 17:28:00 | 2006 | 12 | 5 | 17 | 43.100000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2075254 | 26/11/2010 | 20:58:00 | 0.946 | 0.000 | 240.43 | 4.0 | 0.0 | 0.0 | 0.0 | 2010-11-26 20:58:00 | 2010 | 11 | 4 | 20 | 15.766667 |
| 2075255 | 26/11/2010 | 20:59:00 | 0.944 | 0.000 | 240.00 | 4.0 | 0.0 | 0.0 | 0.0 | 2010-11-26 20:59:00 | 2010 | 11 | 4 | 20 | 15.733333 |
| 2075256 | 26/11/2010 | 21:00:00 | 0.938 | 0.000 | 239.82 | 3.8 | 0.0 | 0.0 | 0.0 | 2010-11-26 21:00:00 | 2010 | 11 | 4 | 21 | 15.633333 |
| 2075257 | 26/11/2010 | 21:01:00 | 0.934 | 0.000 | 239.70 | 3.8 | 0.0 | 0.0 | 0.0 | 2010-11-26 21:01:00 | 2010 | 11 | 4 | 21 | 15.566667 |
| 2075258 | 26/11/2010 | 21:02:00 | 0.932 | 0.000 | 239.55 | 3.8 | 0.0 | 0.0 | 0.0 | 2010-11-26 21:02:00 | 2010 | 11 | 4 | 21 | 15.533333 |
2049280 rows × 15 columns
# Drop columns that are not to be used
df = df.drop(columns=['Date', 'Time', 'Global_active_power', 'Global_reactive_power', 'Voltage', 'Global_intensity',
'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3'])
df.describe()
| Year | Month | DayofWeek | Hour | Power_consumption | |
|---|---|---|---|---|---|
| count | 2.049280e+06 | 2.049280e+06 | 2.049280e+06 | 2.049280e+06 | 2.049280e+06 |
| mean | 2.008425e+03 | 6.497968e+00 | 2.992479e+00 | 1.150391e+01 | 9.314693e+00 |
| std | 1.124388e+00 | 3.446016e+00 | 1.996235e+00 | 6.925189e+00 | 9.585916e+00 |
| min | 2.006000e+03 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | -2.400000e+00 |
| 25% | 2.007000e+03 | 4.000000e+00 | 1.000000e+00 | 5.000000e+00 | 3.800000e+00 |
| 50% | 2.008000e+03 | 7.000000e+00 | 3.000000e+00 | 1.200000e+01 | 5.500000e+00 |
| 75% | 2.009000e+03 | 1.000000e+01 | 5.000000e+00 | 1.800000e+01 | 1.036667e+01 |
| max | 2.010000e+03 | 1.200000e+01 | 6.000000e+00 | 2.300000e+01 | 1.248333e+02 |
rows = ['Hour', 'DayofWeek', 'Month', 'Year']
sns.set(font_scale=2)
f, axes = plt.subplots(nrows=4, ncols=1, figsize=(20, 35))
f.suptitle('Power Consumption Distribution', weight='bold', fontsize=20)
for i, row in enumerate(rows):
sns.boxplot(data=df, x=row, y='Power_consumption', ax=axes.flatten()[i], color='red', showfliers = False)
df.corr().style.background_gradient(cmap="coolwarm") # try 'Blues'
| Year | Month | DayofWeek | Hour | Power_consumption | |
|---|---|---|---|---|---|
| Year | 1.000000 | -0.073940 | -0.000441 | -0.000785 | -0.088137 |
| Month | -0.073940 | 1.000000 | 0.000453 | -0.000936 | 0.013474 |
| DayofWeek | -0.000441 | 0.000453 | 1.000000 | -0.000417 | 0.038672 |
| Hour | -0.000785 | -0.000936 | -0.000417 | 1.000000 | 0.294831 |
| Power_consumption | -0.088137 | 0.013474 | 0.038672 | 0.294831 | 1.000000 |
df.corr().loc[df.corr()['Power_consumption'].idxmax()].iloc[1:4].sort_values(ascending = False)
Hour 0.294831 DayofWeek 0.038672 Month 0.013474 Name: Power_consumption, dtype: float64
df.corr().loc[df.corr()['Power_consumption'].idxmax()].iloc[1:4].sort_values(ascending = False).plot(kind='bar', title='Correlation', fontsize=15, figsize=(17, 6));
Based on the previous analysis, four (3) new features were engineered from the date and time, to be used in the Machine Learning modelling
The correlation numbers show that the Hour is the highest time-related feature that affects the electric power consumption
import logging
logging.getLogger().setLevel(logging.CRITICAL) # Prevent the matplotlib infos on categorical x-axis, removable
# Energy distribution
sns.set(font_scale=1.5)
fig, ax = plt.subplots()
sns.distplot(df["Power_consumption"], ax=ax)
ax.set_xlim(-5, 40)
ax.set_xticks(range(-5,40,2))
plt.title("Energy Distribution")
Text(0.5, 1.0, 'Energy Distribution')
# Trend over months
sns.set(font_scale=1.3)
plt.rcParams.update({'figure.figsize': (17, 3), 'figure.dpi':300})
fig, ax = plt.subplots()
sns.lineplot(data=df.tail(100000), x='Date_time', y='Power_consumption')
plt.grid(linestyle='-', linewidth=0.3)
ax.tick_params(axis='x', rotation=90)
# Trend over days
sns.set(font_scale=1.3)
plt.rcParams.update({'figure.figsize': (17, 3), 'figure.dpi':300})
fig, ax = plt.subplots()
sns.lineplot(data=df.tail(10000), x='Date_time', y='Power_consumption')
plt.grid(linestyle='-', linewidth=0.3)
ax.tick_params(axis='x', rotation=90)
# Trend over hours
sns.set(font_scale=1.3)
plt.rcParams.update({'figure.figsize': (17, 3), 'figure.dpi':300})
fig, ax = plt.subplots()
sns.lineplot(data=df.tail(1000), x='Date_time', y='Power_consumption')
# sns.lineplot(data=df.iloc[-1000:-1], x='Date_time', y='Power_consumption')
plt.grid(linestyle='-', linewidth=0.3)
ax.tick_params(axis='x', rotation=90)
sns.set(font_scale=1.3)
plt.rcParams.update({'figure.figsize': (17, 3), 'figure.dpi':300})
fig, ax = plt.subplots()
sns.lineplot(data=df.iloc[-2000:-1000], x='Date_time', y='Power_consumption')
plt.grid(linestyle='-', linewidth=0.3)
ax.tick_params(axis='x', rotation=90)
sns.set(font_scale=1.3)
plt.rcParams.update({'figure.figsize': (17, 3), 'figure.dpi':300})
fig, ax = plt.subplots()
sns.lineplot(data=df.iloc[-3000:-2000], x='Date_time', y='Power_consumption')
plt.grid(linestyle='-', linewidth=0.3)
ax.tick_params(axis='x', rotation=90)
sns.set(font_scale=1.3)
plt.rcParams.update({'figure.figsize': (17, 3), 'figure.dpi':300})
fig, ax = plt.subplots()
sns.lineplot(data=df.iloc[-4000:-3000], x='Date_time', y='Power_consumption')
plt.grid(linestyle='-', linewidth=0.3)
ax.tick_params(axis='x', rotation=90)
df_new = df.copy()
df_new = df_new.set_index(['Date_time'])
plt.figure(figsize=(17, 3))
plt.plot(df_new.resample('M').mean().values[:, 4])
plt.title('Average Monthly Power Consumption')
plt.xlabel("Month Timeline")
plt.ylabel("Power Consumption")
plt.show()
plt.figure(figsize=(17, 3))
plt.plot(df_new.resample('D').mean().values[:, 4])
plt.title('Average Daily Power Consumption')
plt.xlabel("Day Timeline")
plt.ylabel("Power Consumption")
plt.show()
plt.figure(figsize=(17, 3))
plt.plot(df_new.resample('H').mean().values[:, 4])
plt.title('Average Hourly Power Consumption')
plt.xlabel("Hour Timeline")
plt.ylabel("Power Consumption")
plt.show()
# Logistic Regression:
features = ['Month', 'Hour', 'DayofWeek']
X = df[features]
y = df['Power_consumption']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('MSE', mean_squared_error(y_test, y_pred))
print('R2', r2_score(y_test, y_pred))
MSE 83.15425764019301 R2 0.08989794995492828
# LR With scaling:
features = ['Month', 'Hour', 'DayofWeek']
X = df[features]
y = df['Power_consumption']
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.30, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('MSE', mean_squared_error(y_test, y_pred))
print('R2', r2_score(y_test, y_pred))
MSE 83.15425764019305 R2 0.08989794995492773
# Note that we do not need to scale
features = ['Month', 'Hour', 'DayofWeek']
X = df[features]
y = df['Power_consumption']
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.30, random_state=42)
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('MSE', mean_squared_error(y_test, y_pred))
print('R2', r2_score(y_test, y_pred))
MSE 65.41834423107447 R2 0.2840129791925724
features = ['Month', 'Hour', 'DayofWeek']
X = df[features]
y = df['Power_consumption']
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.30, random_state=42)
model = XGBRegressor(n_estimators=500, learning_rate=0.01)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('MSE', mean_squared_error(y_test, y_pred))
print('R2', r2_score(y_test, y_pred))
MSE 66.26210471567353 R2 0.27477823681650126
# XGBRegressor cross-validate
from sklearn.model_selection import cross_validate
xgb_scores = cross_validate(model, X, y, cv=5, scoring=['neg_mean_squared_error', 'r2'], n_jobs=4)
print('XGB MSE=', xgb_scores['test_neg_mean_squared_error'].mean()*-1, ',\tXGB R2 =', xgb_scores['test_r2'].mean())
XGB MSE= 69.81654421583985 , XGB R2 = 0.22773259603665133
df['Date_time'].max()
Timestamp('2010-12-11 23:59:00')
train_df = df[(df['Date_time'] < '2010-11-08') & (df['Date_time'] >= '2010-05-01')]
test_df = df[df['Date_time'] >= '2010-11-08']
X_train, y_train = train_df[features], train_df['Power_consumption']
X_test, y_test = test_df[features], test_df['Power_consumption']
model = XGBRegressor(n_estimators=500, learning_rate=0.01)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('MSE', mean_squared_error(y_test, y_pred))
print('R2', r2_score(y_test, y_pred))
MSE 59.24861468573801 R2 0.15810422357413334
test_df = test_df.reset_index().drop('index', axis=1)
test_df['predictions'] = pd.Series(y_pred)
sns.set(font_scale=1.4)
plt.rcParams.update({'figure.figsize': (17, 3), 'figure.dpi':300})
fig, ax = plt.subplots()
sns.lineplot(data=df.tail(10000), x='Date_time', y='Power_consumption')
sns.lineplot(data=test_df, x='Date_time', y='predictions')
plt.grid(linestyle='-', linewidth=0.3)
ax.tick_params(axis='x', rotation=90)
sns.set(font_scale=1.4)
plt.figure(figsize=(16,8))
plt.title('XGB Regressor Prediction')
plt.xlabel('Time Steps')
plt.ylabel('Power Consumption')
plt.plot(test_df[['Power_consumption', 'predictions']])
plt.legend(['Actual', 'Predictions'], loc='lower right')
plt.show()
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers.core import Dense, Activation, Dropout
from tensorflow.keras.layers import LSTM
from keras.models import Sequential
df_resample = df_new.resample('h').mean()
df_resample.shape
(34951, 5)
df_resample
| Year | Month | DayofWeek | Hour | Power_consumption | |
|---|---|---|---|---|---|
| Date_time | |||||
| 2006-12-16 17:00:00 | 2006.0 | 12.0 | 5.0 | 17.0 | 52.992593 |
| 2006-12-16 18:00:00 | 2006.0 | 12.0 | 5.0 | 18.0 | 36.953333 |
| 2006-12-16 19:00:00 | 2006.0 | 12.0 | 5.0 | 19.0 | 38.553889 |
| 2006-12-16 20:00:00 | 2006.0 | 12.0 | 5.0 | 20.0 | 37.692778 |
| 2006-12-16 21:00:00 | 2006.0 | 12.0 | 5.0 | 21.0 | 33.307778 |
| ... | ... | ... | ... | ... | ... |
| 2010-12-11 19:00:00 | 2010.0 | 12.0 | 5.0 | 19.0 | 26.382778 |
| 2010-12-11 20:00:00 | 2010.0 | 12.0 | 5.0 | 20.0 | 10.005556 |
| 2010-12-11 21:00:00 | 2010.0 | 12.0 | 5.0 | 21.0 | 9.581667 |
| 2010-12-11 22:00:00 | 2010.0 | 12.0 | 5.0 | 22.0 | 5.342222 |
| 2010-12-11 23:00:00 | 2010.0 | 12.0 | 5.0 | 23.0 | 10.664444 |
34951 rows × 5 columns
df_resample = df_resample.drop(columns=['Year', 'Month', 'DayofWeek', 'Hour'])
df_resample
| Power_consumption | |
|---|---|
| Date_time | |
| 2006-12-16 17:00:00 | 52.992593 |
| 2006-12-16 18:00:00 | 36.953333 |
| 2006-12-16 19:00:00 | 38.553889 |
| 2006-12-16 20:00:00 | 37.692778 |
| 2006-12-16 21:00:00 | 33.307778 |
| ... | ... |
| 2010-12-11 19:00:00 | 26.382778 |
| 2010-12-11 20:00:00 | 10.005556 |
| 2010-12-11 21:00:00 | 9.581667 |
| 2010-12-11 22:00:00 | 5.342222 |
| 2010-12-11 23:00:00 | 10.664444 |
34951 rows × 1 columns
# The function is from 'Deep Learning for Time Series Forecasting', Jason Brownlee
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
"""
Frame a time series as a supervised learning dataset.
Arguments:
data: Sequence of observations as a list or NumPy array.
n_in: Number of lag observations as input (X).
n_out: Number of observations as output (y).
dropnan: Boolean whether or not to drop rows with NaN values.
Returns:
Pandas DataFrame of series framed for supervised learning.
"""
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
agg = pd.concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
# Scaling the power consumption values
values = df_resample.values
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
scaled
array([[0.81334629],
[0.56059147],
[0.58581384],
...,
[0.1292548 ],
[0.06244747],
[0.14631776]])
# Creating a supervised dataframe
reframed = series_to_supervised(scaled, 1, 1)
reframed
| var1(t-1) | var1(t) | |
|---|---|---|
| 1 | 0.813346 | 0.560591 |
| 2 | 0.560591 | 0.585814 |
| 3 | 0.585814 | 0.572244 |
| 4 | 0.572244 | 0.503143 |
| 5 | 0.503143 | 0.484145 |
| ... | ... | ... |
| 34946 | 0.577313 | 0.394015 |
| 34947 | 0.394015 | 0.135935 |
| 34948 | 0.135935 | 0.129255 |
| 34949 | 0.129255 | 0.062447 |
| 34950 | 0.062447 | 0.146318 |
34147 rows × 2 columns
values = reframed.values
values
array([[0.81334629, 0.56059147],
[0.56059147, 0.58581384],
[0.58581384, 0.57224401],
...,
[0.13593465, 0.1292548 ],
[0.1292548 , 0.06244747],
[0.06244747, 0.14631776]])
# Create the train and test data. Only 4000 first data points are the train set.
n_train_time = 4000
train = values[:n_train_time, :]
test = values[n_train_time:, :]
train_x, train_y = train[:, :-1], train[:, -1]
test_x, test_y = test[:, :-1], test[:, -1]
train_x = train_x.reshape((train_x.shape[0], 1, train_x.shape[1]))
test_x = test_x.reshape((test_x.shape[0], 1, test_x.shape[1]))
model = Sequential()
model.add(LSTM(100, input_shape=(train_x.shape[1], train_x.shape[2])))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(train_x, train_y, epochs=50, batch_size=70, validation_data=(test_x, test_y), verbose=2, shuffle=False)
Epoch 1/50 58/58 - 3s - loss: 0.0346 - val_loss: 0.0120 - 3s/epoch - 60ms/step Epoch 2/50 58/58 - 1s - loss: 0.0206 - val_loss: 0.0101 - 858ms/epoch - 15ms/step Epoch 3/50 58/58 - 1s - loss: 0.0171 - val_loss: 0.0085 - 852ms/epoch - 15ms/step Epoch 4/50 58/58 - 1s - loss: 0.0139 - val_loss: 0.0074 - 886ms/epoch - 15ms/step Epoch 5/50 58/58 - 1s - loss: 0.0118 - val_loss: 0.0069 - 853ms/epoch - 15ms/step Epoch 6/50 58/58 - 1s - loss: 0.0108 - val_loss: 0.0068 - 855ms/epoch - 15ms/step Epoch 7/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 846ms/epoch - 15ms/step Epoch 8/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 850ms/epoch - 15ms/step Epoch 9/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 863ms/epoch - 15ms/step Epoch 10/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 861ms/epoch - 15ms/step Epoch 11/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 871ms/epoch - 15ms/step Epoch 12/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 889ms/epoch - 15ms/step Epoch 13/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 851ms/epoch - 15ms/step Epoch 14/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 845ms/epoch - 15ms/step Epoch 15/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 856ms/epoch - 15ms/step Epoch 16/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 855ms/epoch - 15ms/step Epoch 17/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 839ms/epoch - 14ms/step Epoch 18/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 848ms/epoch - 15ms/step Epoch 19/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 847ms/epoch - 15ms/step Epoch 20/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 862ms/epoch - 15ms/step Epoch 21/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 855ms/epoch - 15ms/step Epoch 22/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 852ms/epoch - 15ms/step Epoch 23/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 854ms/epoch - 15ms/step Epoch 24/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 856ms/epoch - 15ms/step Epoch 25/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 849ms/epoch - 15ms/step Epoch 26/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 851ms/epoch - 15ms/step Epoch 27/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 865ms/epoch - 15ms/step Epoch 28/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 858ms/epoch - 15ms/step Epoch 29/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 869ms/epoch - 15ms/step Epoch 30/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 852ms/epoch - 15ms/step Epoch 31/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 855ms/epoch - 15ms/step Epoch 32/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 876ms/epoch - 15ms/step Epoch 33/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 854ms/epoch - 15ms/step Epoch 34/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 934ms/epoch - 16ms/step Epoch 35/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 978ms/epoch - 17ms/step Epoch 36/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 870ms/epoch - 15ms/step Epoch 37/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 860ms/epoch - 15ms/step Epoch 38/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 892ms/epoch - 15ms/step Epoch 39/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 898ms/epoch - 15ms/step Epoch 40/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 859ms/epoch - 15ms/step Epoch 41/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 849ms/epoch - 15ms/step Epoch 42/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 851ms/epoch - 15ms/step Epoch 43/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 963ms/epoch - 17ms/step Epoch 44/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 902ms/epoch - 16ms/step Epoch 45/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 894ms/epoch - 15ms/step Epoch 46/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 863ms/epoch - 15ms/step Epoch 47/50 58/58 - 1s - loss: 0.0104 - val_loss: 0.0068 - 862ms/epoch - 15ms/step Epoch 48/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 856ms/epoch - 15ms/step Epoch 49/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 915ms/epoch - 16ms/step Epoch 50/50 58/58 - 1s - loss: 0.0103 - val_loss: 0.0068 - 869ms/epoch - 15ms/step
# Plot loss history
sns.set(font_scale=1.4)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
# Prediction on the test set
size = df_resample.shape[1]
# Prediction test
yhat = model.predict(test_x)
test_x = test_x.reshape((test_x.shape[0], size))
# invert scaling for prediction
inv_yhat = np.concatenate((yhat, test_x[:, 1-size:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_x[:, 1-size:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)
943/943 [==============================] - 2s 2ms/step Test RMSE: 5.225
# Visualization of the predicted vs. actual for the first 500 time-stepps
sns.set(font_scale=2)
aa=[x for x in range(500)]
plt.figure(figsize=(25,10))
plt.plot(aa, inv_y[:500], marker='.', label="Actual")
plt.plot(aa, inv_yhat[:500], 'r', label="Prediction")
plt.ylabel(df.columns[5], size=25)
plt.xlabel('Time step for first 500 hours', size=25)
plt.legend(fontsize=20)
plt.show()
# Visualization of the predicted vs. actual for the 20,000 to 20,000 time-stepps
sns.set(font_scale=2)
aa=[x for x in range(1000)]
plt.figure(figsize=(25,10))
plt.plot(aa, inv_y[20000:21000], marker='.', label="Actual")
plt.plot(aa, inv_yhat[20000:21000], 'r', label="Prediction")
plt.ylabel(df_resample.columns[0], size=25)
plt.xlabel('Time step for 1000 hours from 20,000 to 21,000', size=25)
plt.legend(fontsize=20)
plt.show()
reframed_5_1 = series_to_supervised(df_resample, 5, 1)
reframed_5_1
| var1(t-5) | var1(t-4) | var1(t-3) | var1(t-2) | var1(t-1) | var1(t) | |
|---|---|---|---|---|---|---|
| Date_time | ||||||
| 2006-12-16 22:00:00 | 52.992593 | 36.953333 | 38.553889 | 37.692778 | 33.307778 | 32.102222 |
| 2006-12-16 23:00:00 | 36.953333 | 38.553889 | 37.692778 | 33.307778 | 32.102222 | 34.276667 |
| 2006-12-17 00:00:00 | 38.553889 | 37.692778 | 33.307778 | 32.102222 | 34.276667 | 30.907778 |
| 2006-12-17 01:00:00 | 37.692778 | 33.307778 | 32.102222 | 34.276667 | 30.907778 | 30.590000 |
| 2006-12-17 02:00:00 | 33.307778 | 32.102222 | 34.276667 | 30.907778 | 30.590000 | 25.887778 |
| ... | ... | ... | ... | ... | ... | ... |
| 2010-12-11 19:00:00 | 15.265556 | 14.829444 | 19.644444 | 30.851667 | 38.014444 | 26.382778 |
| 2010-12-11 20:00:00 | 14.829444 | 19.644444 | 30.851667 | 38.014444 | 26.382778 | 10.005556 |
| 2010-12-11 21:00:00 | 19.644444 | 30.851667 | 38.014444 | 26.382778 | 10.005556 | 9.581667 |
| 2010-12-11 22:00:00 | 30.851667 | 38.014444 | 26.382778 | 10.005556 | 9.581667 | 5.342222 |
| 2010-12-11 23:00:00 | 38.014444 | 26.382778 | 10.005556 | 9.581667 | 5.342222 | 10.664444 |
34063 rows × 6 columns
# Create the train and test data. The first 27,000 first data points are the train set.
values = reframed_5_1.values
n_train_time = 27000
train = values[:n_train_time, :]
test = values[n_train_time:, :]
train_x, train_y = train[:, :-1], train[:, -1]
test_x, test_y = test[:, :-1], test[:, -1]
train_x = train_x.reshape((train_x.shape[0], train_x.shape[1], 1))
test_x = test_x.reshape((test_x.shape[0], test_x.shape[1], 1))
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(train_x.shape[1], 1)))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(train_x, train_y, epochs=50, batch_size=70, validation_data=(test_x, test_y), verbose=2, shuffle=False)
Epoch 1/50 386/386 - 3s - loss: 52.9758 - val_loss: 18.1406 - 3s/epoch - 8ms/step Epoch 2/50 386/386 - 2s - loss: 31.7021 - val_loss: 17.7105 - 2s/epoch - 5ms/step Epoch 3/50 386/386 - 2s - loss: 31.3429 - val_loss: 17.7060 - 2s/epoch - 6ms/step Epoch 4/50 386/386 - 2s - loss: 30.6038 - val_loss: 17.6692 - 2s/epoch - 5ms/step Epoch 5/50 386/386 - 2s - loss: 30.8964 - val_loss: 17.7510 - 2s/epoch - 6ms/step Epoch 6/50 386/386 - 2s - loss: 30.8660 - val_loss: 17.5917 - 2s/epoch - 5ms/step Epoch 7/50 386/386 - 2s - loss: 30.5576 - val_loss: 17.7499 - 2s/epoch - 5ms/step Epoch 8/50 386/386 - 2s - loss: 30.3992 - val_loss: 17.6264 - 2s/epoch - 4ms/step Epoch 9/50 386/386 - 2s - loss: 30.6513 - val_loss: 17.6461 - 2s/epoch - 4ms/step Epoch 10/50 386/386 - 2s - loss: 30.1873 - val_loss: 17.6977 - 2s/epoch - 4ms/step Epoch 11/50 386/386 - 2s - loss: 30.0631 - val_loss: 17.5348 - 2s/epoch - 4ms/step Epoch 12/50 386/386 - 2s - loss: 30.0729 - val_loss: 17.5196 - 2s/epoch - 4ms/step Epoch 13/50 386/386 - 2s - loss: 29.8715 - val_loss: 17.5425 - 2s/epoch - 4ms/step Epoch 14/50 386/386 - 2s - loss: 29.9686 - val_loss: 17.5045 - 2s/epoch - 4ms/step Epoch 15/50 386/386 - 2s - loss: 29.9537 - val_loss: 17.4187 - 2s/epoch - 4ms/step Epoch 16/50 386/386 - 2s - loss: 29.7113 - val_loss: 17.4675 - 2s/epoch - 5ms/step Epoch 17/50 386/386 - 2s - loss: 29.6321 - val_loss: 17.3978 - 2s/epoch - 4ms/step Epoch 18/50 386/386 - 2s - loss: 29.5271 - val_loss: 17.5491 - 2s/epoch - 4ms/step Epoch 19/50 386/386 - 2s - loss: 29.4646 - val_loss: 17.5143 - 2s/epoch - 5ms/step Epoch 20/50 386/386 - 2s - loss: 29.2731 - val_loss: 17.5762 - 2s/epoch - 5ms/step Epoch 21/50 386/386 - 2s - loss: 29.2838 - val_loss: 17.3713 - 2s/epoch - 4ms/step Epoch 22/50 386/386 - 2s - loss: 29.6158 - val_loss: 17.4821 - 2s/epoch - 4ms/step Epoch 23/50 386/386 - 2s - loss: 29.3818 - val_loss: 17.4693 - 2s/epoch - 4ms/step Epoch 24/50 386/386 - 2s - loss: 29.1837 - val_loss: 17.6105 - 2s/epoch - 4ms/step Epoch 25/50 386/386 - 2s - loss: 29.2670 - val_loss: 17.3797 - 2s/epoch - 4ms/step Epoch 26/50 386/386 - 2s - loss: 29.1176 - val_loss: 17.5412 - 2s/epoch - 4ms/step Epoch 27/50 386/386 - 2s - loss: 29.1205 - val_loss: 17.4800 - 2s/epoch - 4ms/step Epoch 28/50 386/386 - 2s - loss: 28.9021 - val_loss: 17.5185 - 2s/epoch - 4ms/step Epoch 29/50 386/386 - 2s - loss: 28.9652 - val_loss: 17.5711 - 2s/epoch - 4ms/step Epoch 30/50 386/386 - 2s - loss: 28.8372 - val_loss: 17.4866 - 2s/epoch - 5ms/step Epoch 31/50 386/386 - 2s - loss: 29.0232 - val_loss: 17.6104 - 2s/epoch - 5ms/step Epoch 32/50 386/386 - 2s - loss: 28.8579 - val_loss: 17.5299 - 2s/epoch - 5ms/step Epoch 33/50 386/386 - 2s - loss: 28.6998 - val_loss: 17.4128 - 2s/epoch - 4ms/step Epoch 34/50 386/386 - 2s - loss: 28.8420 - val_loss: 17.5600 - 2s/epoch - 4ms/step Epoch 35/50 386/386 - 2s - loss: 28.8865 - val_loss: 17.5830 - 2s/epoch - 5ms/step Epoch 36/50 386/386 - 2s - loss: 28.8701 - val_loss: 17.4228 - 2s/epoch - 5ms/step Epoch 37/50 386/386 - 2s - loss: 28.6083 - val_loss: 17.3527 - 2s/epoch - 4ms/step Epoch 38/50 386/386 - 2s - loss: 28.7827 - val_loss: 17.4708 - 2s/epoch - 4ms/step Epoch 39/50 386/386 - 2s - loss: 28.8361 - val_loss: 17.3816 - 2s/epoch - 4ms/step Epoch 40/50 386/386 - 2s - loss: 28.5401 - val_loss: 17.4312 - 2s/epoch - 4ms/step Epoch 41/50 386/386 - 2s - loss: 28.4969 - val_loss: 17.5387 - 2s/epoch - 5ms/step Epoch 42/50 386/386 - 2s - loss: 28.6778 - val_loss: 17.3358 - 2s/epoch - 4ms/step Epoch 43/50 386/386 - 2s - loss: 28.4774 - val_loss: 17.3425 - 2s/epoch - 4ms/step Epoch 44/50 386/386 - 2s - loss: 28.5465 - val_loss: 17.4052 - 2s/epoch - 5ms/step Epoch 45/50 386/386 - 2s - loss: 28.3912 - val_loss: 17.3261 - 2s/epoch - 5ms/step Epoch 46/50 386/386 - 2s - loss: 28.1596 - val_loss: 17.3878 - 2s/epoch - 5ms/step Epoch 47/50 386/386 - 2s - loss: 28.2990 - val_loss: 17.4403 - 2s/epoch - 4ms/step Epoch 48/50 386/386 - 2s - loss: 28.4316 - val_loss: 17.3925 - 2s/epoch - 5ms/step Epoch 49/50 386/386 - 2s - loss: 28.1209 - val_loss: 17.4082 - 2s/epoch - 5ms/step Epoch 50/50 386/386 - 2s - loss: 28.4401 - val_loss: 17.3857 - 2s/epoch - 5ms/step
# Plot loss history
sns.set(font_scale=1.4)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
# Prediction on the test set
yhat = model.predict(test_x)
yhat
221/221 [==============================] - 1s 2ms/step
array([[6.1268864],
[6.944149 ],
[6.3943896],
...,
[6.686078 ],
[7.970878 ],
[6.1730075]], dtype=float32)
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(test_y, yhat))
print('Test RMSE: %.3f' % rmse)
Test RMSE: 4.170
# Visualization of the predicted vs. actual for the first 500 time-stepps
sns.set(font_scale=2)
aa=[x for x in range(500)]
plt.figure(figsize=(25,10))
plt.plot(aa, test_y[:500], marker='.', label="Actual")
plt.plot(aa, yhat[:500], 'r', label="Prediction")
plt.ylabel(df_resample.columns[0], size=25)
plt.xlabel('Time step for first 500 hours', size=25)
plt.legend(fontsize=20)
plt.show()
# Visualization of the predicted vs. actual for the 5,000 to 6,000 time-stepps
sns.set(font_scale=2)
aa=[x for x in range(1000)]
plt.figure(figsize=(25,10))
plt.plot(aa, test_y[5000:6000], marker='.', label="Actual")
plt.plot(aa, yhat[5000:6000], 'r', label="Prediction")
plt.ylabel(df_resample.columns[0], size=25)
plt.xlabel('Time step for 1000 hours from 5,000 to 6,000', size=25)
plt.legend(fontsize=20)
plt.show()
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
reframed_5_3 = series_to_supervised(df_resample, 5, 3)
reframed_5_3
| var1(t-5) | var1(t-4) | var1(t-3) | var1(t-2) | var1(t-1) | var1(t) | var1(t+1) | var1(t+2) | |
|---|---|---|---|---|---|---|---|---|
| Date_time | ||||||||
| 2006-12-16 22:00:00 | 52.992593 | 36.953333 | 38.553889 | 37.692778 | 33.307778 | 32.102222 | 34.276667 | 30.907778 |
| 2006-12-16 23:00:00 | 36.953333 | 38.553889 | 37.692778 | 33.307778 | 32.102222 | 34.276667 | 30.907778 | 30.590000 |
| 2006-12-17 00:00:00 | 38.553889 | 37.692778 | 33.307778 | 32.102222 | 34.276667 | 30.907778 | 30.590000 | 25.887778 |
| 2006-12-17 01:00:00 | 37.692778 | 33.307778 | 32.102222 | 34.276667 | 30.907778 | 30.590000 | 25.887778 | 26.936667 |
| 2006-12-17 02:00:00 | 33.307778 | 32.102222 | 34.276667 | 30.907778 | 30.590000 | 25.887778 | 26.936667 | 27.479444 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2010-12-11 17:00:00 | 8.465556 | 13.363889 | 15.265556 | 14.829444 | 19.644444 | 30.851667 | 38.014444 | 26.382778 |
| 2010-12-11 18:00:00 | 13.363889 | 15.265556 | 14.829444 | 19.644444 | 30.851667 | 38.014444 | 26.382778 | 10.005556 |
| 2010-12-11 19:00:00 | 15.265556 | 14.829444 | 19.644444 | 30.851667 | 38.014444 | 26.382778 | 10.005556 | 9.581667 |
| 2010-12-11 20:00:00 | 14.829444 | 19.644444 | 30.851667 | 38.014444 | 26.382778 | 10.005556 | 9.581667 | 5.342222 |
| 2010-12-11 21:00:00 | 19.644444 | 30.851667 | 38.014444 | 26.382778 | 10.005556 | 9.581667 | 5.342222 | 10.664444 |
34021 rows × 8 columns
# Create the train and test data. The first 27,000 first data points are the train set.
values = reframed_5_3.values
n_train_time = 27000
train = values[:n_train_time, :]
test = values[n_train_time:, :]
train_x, train_y = train[:, :-3], train[:, -3:]
test_x, test_y = test[:, :-3], test[:, -3:]
train_x = train_x.reshape((train_x.shape[0], train_x.shape[1], 1))
test_x = test_x.reshape((test_x.shape[0], test_x.shape[1], 1))
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(train_x.shape[1], 1)))
model.add(Dropout(0.1))
model.add(RepeatVector(3))
model.add(TimeDistributed(Dense(1)))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(train_x, train_y, epochs=50, batch_size=70, validation_data=(test_x, test_y), verbose=2, shuffle=False)
Epoch 1/50 386/386 - 4s - loss: 64.5813 - val_loss: 30.4759 - 4s/epoch - 10ms/step Epoch 2/50 386/386 - 3s - loss: 51.3051 - val_loss: 29.8828 - 3s/epoch - 7ms/step Epoch 3/50 386/386 - 3s - loss: 50.5956 - val_loss: 29.9563 - 3s/epoch - 7ms/step Epoch 4/50 386/386 - 3s - loss: 50.6291 - val_loss: 29.9837 - 3s/epoch - 7ms/step Epoch 5/50 386/386 - 3s - loss: 50.0592 - val_loss: 30.0363 - 3s/epoch - 7ms/step Epoch 6/50 386/386 - 3s - loss: 49.9893 - val_loss: 29.8490 - 3s/epoch - 7ms/step Epoch 7/50 386/386 - 3s - loss: 49.7269 - val_loss: 29.9512 - 3s/epoch - 7ms/step Epoch 8/50 386/386 - 3s - loss: 49.8269 - val_loss: 29.6274 - 3s/epoch - 7ms/step Epoch 9/50 386/386 - 3s - loss: 49.4829 - val_loss: 30.3837 - 3s/epoch - 7ms/step Epoch 10/50 386/386 - 3s - loss: 49.4948 - val_loss: 29.9021 - 3s/epoch - 7ms/step Epoch 11/50 386/386 - 3s - loss: 49.2232 - val_loss: 29.8253 - 3s/epoch - 7ms/step Epoch 12/50 386/386 - 3s - loss: 49.4945 - val_loss: 29.8634 - 3s/epoch - 7ms/step Epoch 13/50 386/386 - 3s - loss: 49.2657 - val_loss: 29.6597 - 3s/epoch - 7ms/step Epoch 14/50 386/386 - 3s - loss: 49.0337 - val_loss: 29.8536 - 3s/epoch - 7ms/step Epoch 15/50 386/386 - 3s - loss: 48.8907 - val_loss: 29.6848 - 3s/epoch - 7ms/step Epoch 16/50 386/386 - 3s - loss: 48.9295 - val_loss: 29.7518 - 3s/epoch - 7ms/step Epoch 17/50 386/386 - 3s - loss: 48.7481 - val_loss: 29.8555 - 3s/epoch - 7ms/step Epoch 18/50 386/386 - 3s - loss: 48.7502 - val_loss: 29.6946 - 3s/epoch - 7ms/step Epoch 19/50 386/386 - 3s - loss: 48.5205 - val_loss: 29.9474 - 3s/epoch - 7ms/step Epoch 20/50 386/386 - 3s - loss: 48.4353 - val_loss: 29.8919 - 3s/epoch - 7ms/step Epoch 21/50 386/386 - 3s - loss: 48.4882 - val_loss: 30.0552 - 3s/epoch - 7ms/step Epoch 22/50 386/386 - 3s - loss: 48.3474 - val_loss: 29.8229 - 3s/epoch - 7ms/step Epoch 23/50 386/386 - 3s - loss: 48.2786 - val_loss: 29.9836 - 3s/epoch - 7ms/step Epoch 24/50 386/386 - 3s - loss: 48.3545 - val_loss: 30.0279 - 3s/epoch - 7ms/step Epoch 25/50 386/386 - 3s - loss: 48.4842 - val_loss: 29.9181 - 3s/epoch - 7ms/step Epoch 26/50 386/386 - 3s - loss: 48.1190 - val_loss: 30.1621 - 3s/epoch - 7ms/step Epoch 27/50 386/386 - 3s - loss: 48.1971 - val_loss: 30.1749 - 3s/epoch - 7ms/step Epoch 28/50 386/386 - 3s - loss: 48.1026 - val_loss: 30.1115 - 3s/epoch - 7ms/step Epoch 29/50 386/386 - 3s - loss: 48.1456 - val_loss: 30.1076 - 3s/epoch - 7ms/step Epoch 30/50 386/386 - 3s - loss: 48.2134 - val_loss: 30.1627 - 3s/epoch - 7ms/step Epoch 31/50 386/386 - 3s - loss: 48.1449 - val_loss: 30.2724 - 3s/epoch - 7ms/step Epoch 32/50 386/386 - 3s - loss: 48.1033 - val_loss: 30.3625 - 3s/epoch - 7ms/step Epoch 33/50 386/386 - 3s - loss: 47.8992 - val_loss: 30.3968 - 3s/epoch - 7ms/step Epoch 34/50 386/386 - 3s - loss: 47.9351 - val_loss: 30.3183 - 3s/epoch - 7ms/step Epoch 35/50 386/386 - 2s - loss: 48.0576 - val_loss: 30.3954 - 2s/epoch - 6ms/step Epoch 36/50 386/386 - 3s - loss: 47.6862 - val_loss: 30.1340 - 3s/epoch - 7ms/step Epoch 37/50 386/386 - 3s - loss: 47.9299 - val_loss: 30.3430 - 3s/epoch - 7ms/step Epoch 38/50 386/386 - 3s - loss: 47.8006 - val_loss: 30.3326 - 3s/epoch - 7ms/step Epoch 39/50 386/386 - 3s - loss: 47.7759 - val_loss: 30.5742 - 3s/epoch - 6ms/step Epoch 40/50 386/386 - 2s - loss: 47.6604 - val_loss: 30.3166 - 2s/epoch - 6ms/step Epoch 41/50 386/386 - 2s - loss: 47.7362 - val_loss: 30.4682 - 2s/epoch - 6ms/step Epoch 42/50 386/386 - 2s - loss: 47.7278 - val_loss: 30.5262 - 2s/epoch - 6ms/step Epoch 43/50 386/386 - 2s - loss: 47.5757 - val_loss: 30.3440 - 2s/epoch - 6ms/step Epoch 44/50 386/386 - 2s - loss: 47.6410 - val_loss: 30.3502 - 2s/epoch - 6ms/step Epoch 45/50 386/386 - 3s - loss: 47.4610 - val_loss: 30.6021 - 3s/epoch - 7ms/step Epoch 46/50 386/386 - 2s - loss: 47.4568 - val_loss: 30.4077 - 2s/epoch - 6ms/step Epoch 47/50 386/386 - 2s - loss: 47.4071 - val_loss: 30.3112 - 2s/epoch - 6ms/step Epoch 48/50 386/386 - 2s - loss: 47.6010 - val_loss: 30.4805 - 2s/epoch - 6ms/step Epoch 49/50 386/386 - 3s - loss: 47.4967 - val_loss: 30.4040 - 3s/epoch - 7ms/step Epoch 50/50 386/386 - 2s - loss: 47.2649 - val_loss: 30.5488 - 2s/epoch - 6ms/step
# Plot loss history
sns.set(font_scale=1.4)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
# Prediction test
yhat = model.predict(test_x)
yhat
220/220 [==============================] - 1s 3ms/step
array([[[ 8.453146],
[ 8.453146],
[ 8.453146]],
[[ 9.400521],
[ 9.400521],
[ 9.400521]],
[[17.468319],
[17.468319],
[17.468319]],
...,
[[26.181952],
[26.181952],
[26.181952]],
[[13.933351],
[13.933351],
[13.933351]],
[[ 7.099724],
[ 7.099724],
[ 7.099724]]], dtype=float32)